## ******************************
##       C2.2 County analysis       
##       Hao Zhang & Ye Zhang
##            2025/7/24
## ******************************

# load packages
library(readstata13)
library(lfe)
library(stargazer)
library(tidyverse)
library(doBy)
library(data.table)
library(DescTools)
library(haven)

# set working directory
setwd("../raw data")

# read in data
options(scipen = 100)
firm <- read.dta13("cies 1998-2007.dta")
firm <- firm %>% mutate(city = as.numeric(substr(as.character(city), 0, 6)))

# ownership classification
firm$ownership <- as.numeric(firm$ownership)
firm <- firm %>% mutate(soe = ifelse(ownership == 110 | ownership == 151, 1, 0),
                        hmt = ifelse(ownership > 199 & ownership < 299, 1, 0),
                        fie = ifelse(ownership > 299, 1, 0), 
                        private = ifelse(soe == 0 & hmt == 0 & fie == 0, 1, 0))

# create total_insurance and rate
firm_base <- firm %>% select(city, year, frdm, wage) %>%
  filter(wage > 0) %>% mutate(year = year + 1)
firm <- firm %>% filter(insurance >= 0)
firm <- left_join(firm, firm_base, by = c("city", "year", "frdm"), relationship = "many-to-many")
firm <- firm %>% mutate(rate1 = insurance/wage.y, rate2 = medical/wage.y) %>% 
  mutate(wage = wage.x)
firm$rate1 <- Winsorize(firm$rate1, quantile(firm$rate1, probs = c(0.05, 0.95), na.rm = TRUE))
firm$rate2 <- Winsorize(firm$rate2, quantile(firm$rate2, probs = c(0.05, 0.95), na.rm = TRUE))

# across-year analysis
firm <- firm %>% mutate(insurance_dummy = ifelse(insurance == 0, 0, 1), 
                        medical_dummy = ifelse(medical == 0, 0, 1))

# create indicators and limit years
firm <- firm %>% mutate(sector = as.integer(industry/100)) %>% 
  filter(employment > 6, year >= 2001)

# clean and add county panel
county <- read.csv("county panel 1997-2018.csv") %>% filter(year >= 1997, year <= 2013) %>%
  select(year, county_num, jiedao_num, area, pop, gdp, budget, spending, ind_absc, fixed, medical_bed, welfare, welfare_bed, admcode) %>%
  filter(is.na(admcode) == FALSE)
county_base <- county %>% select(year, admcode, gdp) %>% mutate(year = year + 1)
county <- left_join(county, county_base, by = c("year", "admcode"), relationship = "many-to-many")
county$gdpgrowth <- county$gdp.x/county$gdp.y - 1

county <- county %>% mutate(city = as.integer(admcode/100)) %>% 
  group_by(city, year) %>%
  mutate(gdpgrowth_mean = mean(gdpgrowth, na.rm = TRUE)) %>% 
  mutate(year = year + 1) %>% filter(gdpgrowth >= -1, gdpgrowth <= 1) %>% 
  mutate(gdp_pressure = gdpgrowth_mean - gdpgrowth)

# add variables
county$citycode <- county$admcode
county <- county %>% filter()
firm <- firm %>% mutate(citycode = city)
firm <- left_join(firm, county, by = c("citycode", "year"), relationship = "many-to-many")

# FIRM LEVEL ANALYSIS FOR COUNTY COMPETITION
rm(list=setdiff(ls(), "firm"))

# drop province-level cities
firm <- firm %>% mutate(province = as.integer(admcode/10000)) 
firm <- firm %>% filter(province != 11, province != 12, province != 31, citycode != 5102)

# create variables
firm <- firm %>% mutate(deficit = spending - budget) 

firm <- firm %>% filter(year >= 2000) %>%
  mutate(tax = ifelse(province == 33 & year >= 2004 | province == 13 & year >= 2002 | 
                        province == 42 & year >= 2002 | province == 44 & year >= 2002 | 
                        province == 32 & year >= 2005 | province == 53 & year >= 2006, 1, 0))

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# unemployment insurance
m1 <- felm(insurance_dummy ~ gdp_pressure + ihs(gdpgrowth) + 
             log(gdp.x/pop) + ihs(deficit/gdp.x) + ihs(employment) +ihs(output1) +
             ihs(profit) + ihs(wage.x) + county_num + I(county_num^2) | frdm + year + tax | 0 | city.x, data = firm)

firm1 <- firm %>% filter(rate1 > 0)
m2 <- felm(log(rate1) ~ gdp_pressure + ihs(gdpgrowth) + 
             log(gdp.x/pop) + ihs(deficit/gdp.x) + ihs(employment) +ihs(output1) +
             ihs(profit) + ihs(wage.x) + county_num + I(county_num^2) | frdm + year + tax | 0 | city.x, data = firm1)

m3 <- felm(medical_dummy ~ gdp_pressure + ihs(gdpgrowth) + 
             log(gdp.x/pop) + ihs(deficit/gdp.x) + ihs(employment) +ihs(output1) +
             ihs(profit) + ihs(wage.x) + county_num + I(county_num^2) | frdm + year + tax | 0 | city.x, data = firm)

firm2 <- firm %>% filter(rate2 > 0)
m4 <- felm(log(rate2) ~ gdp_pressure + ihs(gdpgrowth) + 
             log(gdp.x/pop) + ihs(deficit/gdp.x) + ihs(employment) +ihs(output1) +
             ihs(profit) + ihs(wage.x) + county_num + I(county_num^2) | frdm + year + tax | 0 | city.x, data = firm2)

# output regression table
stargazer(m1, m2, m3, m4, title = "Inter-County Competition and Social Insurances", 
          dep.var.labels = c("Unemployment", "Unemployment", "Health and Pension", "Health and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("Firm Characteristics", "Y", "Y", "Y", "Y"), 
                           c("Firm Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
